# -*- coding: utf-8 -*-
import copy
import random
from datetime import datetime
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.dao.catalog_dao import CatalogDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from powerchina.rules import *
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 分类链接
    sku_list_url = "https://emall.powerchina.cn/eshop/front/goods/categoryForJdZb.do?level=3&is_leaf=1&id={}&page.currentPage={}"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 100

    def start_requests(self):
        # 以分类采集MRO
        cat3_list = CatalogDao().get_cat_list_from_pool(query={"level": 3})
        if cat3_list:
            random.shuffle(cat3_list)
            for cat3 in cat3_list:
                page = 1
                cat3_id = cat3.get('_id')
                yield Request(
                    url=self.sku_list_url.format(cat3_id, page),
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'page': page,
                        'cat3Id': cat3_id,
                    },
                    callback=self.parse_sku,
                    errback=self.error_back,
                )

    def parse_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        cat3_id = meta.get('cat3Id')

        sku_list = parse_sku(response)
        if sku_list and len(sku_list) > 0:
            self.logger.info('清单: cat=%s, total=%s' % (cat3_id, cur_page))
            yield Box('sku', self.batch_no, sku_list)

        if cur_page == 1 and len(sku_list):
            total_page = parse_total_page(response)
            if total_page > 1:
                self.logger.info('页数: cat=%s, total=%s' % (cat3_id, total_page))
                for page in range(2, total_page + 1):
                    yield Request(
                        url=self.sku_list_url.format(cat3_id, page),
                        meta={
                            'reqType': 'sku',
                            'batchNo': self.batch_no,
                            'page': page,
                            'cat3Id': cat3_id,
                        },
                        callback=self.parse_sku,
                        errback=self.error_back,
                    )
